Load packages and set options.

Geographic Elk Movement Analysis

We begin by importing the elk data.

elk = read_csv("clean_data/elk.csv") |>
  mutate(elk_id = factor(elk_id)) 
Rows: 104913 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (9): elk_id, year, month, day, hour, lat, long, dist_km, land_cover
dttm (1): dt

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

The elk IDs are: 572, 595, 654, 656, 671, 706, 900, 902, 903, 907, 909, 911, 913, 914, 916, 917, 918, and we are going to follow them around Yellowstone! I found a cute picture of an elk in the snow here

Exploratory Analysis of Elk Migration

Some questions we want to answer about the elk:

Fun stuff I want to do - Make a moving map of the elk

I want to get a quick look of how the elk move across the map. Let’s plot where the elk are on the map for each year!

elk |>
  ggplot(aes(x = long, y = lat, color = elk_id)) + 
  geom_point(alpha = 0.3) +
  facet_grid(~ year) +  
  labs(
    title = "Elk Migration by Year",
    x = "longitude",
    y = "latitude",
    color = "Elk ID"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 75))

In 2013 the elk traveled much farther in longitude than the other years. In 2011, the elk did not go nearly as far East as in other years. In 2010 and 2011 there was more sparse travel and it looks like the elk liked to hang out in the far North and far South spots. The data in 2006 looks like it is not as rich and there might be missing data points here.

Let’s find out what the range of latitude and longitude was for the individual elk each year. I want to see the top 10 elk that moved the most. First let’s look at the top 10 movers for latitude.

elk |>
  group_by(elk_id, year) |>
  summarize(longitude_range = max(long) - min(long)) |>
  arrange(desc(longitude_range)) |>
  head(n= 10) |>
  knitr::kable(col.names = c("Elk ID", "Year", "longitude Range"))
`summarise()` has grouped output by 'elk_id'. You can override using the
`.groups` argument.
Elk ID Year longitude Range
917 2013 0.7775239
918 2014 0.5520892
706 2013 0.5067580
706 2012 0.4919770
916 2013 0.4790952
918 2013 0.4757999
917 2014 0.4695007
914 2013 0.4680962
913 2014 0.4584522
911 2014 0.4541982

We can see that our top 3 movers in terms of longitude are Elk 900, 917, and 916. Elk 917 shows up in this list 3 times for the years 2013, 2014, and 2015. This elk is exploring farther each year!

Now let’s look at the top movers in terms of latitiude.

elk |>
  group_by(elk_id, year) |>
  summarize(latitude_range = max(lat) - min(lat)) |>
  arrange(desc(latitude_range)) |>
  head(n= 10) |>
  knitr::kable(col.names = c("Elk ID", "Year", "latitude Range"))
`summarise()` has grouped output by 'elk_id'. You can override using the
`.groups` argument.
Elk ID Year latitude Range
900 2009 0.8055470
917 2015 0.8034336
916 2014 0.8032785
917 2014 0.8013303
654 2011 0.7895229
916 2013 0.7861957
914 2014 0.7851894
654 2010 0.7849657
917 2013 0.7742691
911 2013 0.7731564

Again we see elk 917 showing up as a top move in terms of latitude for the years 2013, 2014, and 2015 with the range going up each year.This elk is quite the explorer! We can also see that this elk has very similar results to elk 916 for 2013 and 2014. Maybe they move together? We will check this out later in the maps.

Now I want to look at how the total movement of each elk changes across the months of the year.

elk |>
  mutate(month = month(dt, label = TRUE)) |> 
  group_by(elk_id, year, month) |> 
  summarize(
    total_distance_km = sum(dist_km, na.rm = TRUE)
  ) |>
  ggplot(aes(x = elk_id, y = total_distance_km, fill = factor(month))) +
  geom_bar(stat = "identity") +  # Using bars to show total distance
  facet_grid(~factor(month)) +
  labs(
    title = "Total Movement by Elk and Month",
    x = "Elk_ID", 
    y = "Total Distance (km)", 
    fill = "Month") +
  theme(axis.text.x = element_text(angle = 90)) +
  theme(legend.position = 'none')
`summarise()` has grouped output by 'elk_id', 'year'. You can override using
the `.groups` argument.

It looks like there is quite a lot of variance in the movement of different elk. What is really interesting is that December is a big moving month! And November is right behind December in terms of total movement. I wonder if this is a pattern that elk have of moving when the temperature drops, or if there is something skewing the data like this.

Let’s look at the daily averages and how this compares for each of the elk. We are going to only look at the IQR so we can see what the movement is like on most days. It looks like most days they move around

daily_median = 
  elk |>
  mutate(date = date(dt)) |> 
  group_by(elk_id, date) |>
  summarize(daily_sum = sum(dist_km)) |>
  drop_na() |>
  pull(daily_sum) |>
  median()
`summarise()` has grouped output by 'elk_id'. You can override using the
`.groups` argument.
elk |>
  mutate(date = date(dt)) |> 
  group_by(elk_id, date) |>
  summarize(daily_sum = sum(dist_km)) |>
  ggplot(aes(x = elk_id, y = daily_sum)) +
  geom_boxplot(outliers = FALSE) +
  geom_hline(yintercept = daily_median, color = "blue") +
  labs(title = "Daily Averages (m)",
       x = "Elk ID",
       y = "Distance (m)")
`summarise()` has grouped output by 'elk_id'. You can override using the
`.groups` argument.
Warning: Removed 17 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Now let’s look at how far they travel throughout the year.

#calculate the distance they travel each month
elk_seasonal = 
  elk |>
  mutate(
    season = case_when(
      month %in% c(12, 1, 2) ~ "Winter",
      month %in% c(3, 4, 5) ~ "Spring",
      month %in% c(6, 7, 8) ~ "Summer",
      month %in% c(9, 10, 11) ~ "Fall"
    )
  ) |>
  arrange(elk_id, dt)  # Ensure data is ordered

# Calculate distances and group by year, season, and elk_id
elk_distances_seasonal = 
  elk_seasonal |>
  group_by(elk_id, year, season) |>
  summarize(
    total_distance_km = sum(dist_km, na.rm = TRUE) 
  ) |>
  ungroup()
`summarise()` has grouped output by 'elk_id', 'year'. You can override using
the `.groups` argument.
# View results
elk_distances_seasonal |>
  pivot_wider(names_from = season, values_from = total_distance_km) |>
  knitr::kable(col.names = c("Elk ID", "Year", "Fall", "Spring", "Summer", "Winter"))
Elk ID Year Fall Spring Summer Winter
572 2006 358.1692 243.4525102 270.19248 115.6640
572 2007 262.7980 307.3771264 296.25650 266.9184
572 2008 NA 0.5268033 NA 102.2457
595 2007 309.8790 332.3771542 286.95792 209.7931
595 2008 NA 44.7942304 NA 147.0538
654 2010 458.5519 377.0839079 276.17203 105.0326
654 2011 248.2131 610.7469717 355.08460 271.0322
656 2010 101.2863 259.1596272 196.20043 NA
671 2012 297.2838 322.6824827 253.54017 126.0258
671 2013 257.3041 352.6952899 230.10628 261.9005
671 2014 NA NA NA 123.6739
706 2012 349.3021 560.6219499 394.64256 252.7928
706 2013 399.1241 594.4674206 466.60326 404.6597
706 2014 NA 84.5958876 NA 244.3947
900 2008 444.8889 NA 173.34661 191.9041
900 2009 410.9002 483.7385340 377.85262 306.8446
900 2010 NA 186.4186971 NA 158.6582
902 2008 531.6827 NA 171.93479 251.8201
902 2009 464.3840 522.5132624 345.27205 411.7242
902 2010 NA 179.1117077 NA 169.1765
903 2008 405.1701 NA 253.70199 152.7498
903 2009 378.3071 463.3102610 422.34843 275.0602
903 2010 NA 154.3125271 NA 147.2773
907 2013 456.0894 NA 95.12488 185.9591
907 2014 418.9940 575.0174816 269.30745 361.2967
907 2015 NA 457.7967286 25.33632 228.5525
909 2013 488.8248 NA 116.90919 248.0778
909 2014 313.6993 631.8496976 253.46676 437.7405
909 2015 NA 567.1980986 131.50990 292.2471
911 2013 372.7446 NA 114.29155 206.5388
911 2014 213.2257 613.1354415 206.22425 382.6545
913 2013 287.2265 NA 107.38545 186.2400
913 2014 311.1236 459.6227272 229.70137 313.1485
913 2015 NA 482.2381958 199.57717 134.8874
914 2013 419.0289 NA 93.43759 109.7464
914 2014 242.4557 456.2527138 199.99580 221.3847
916 2013 310.0486 NA 146.50213 282.4045
916 2014 305.6969 592.8954920 312.35485 476.8180
917 2013 330.8002 NA 91.84982 166.5031
917 2014 279.0076 538.8104139 269.01899 268.2958
917 2015 NA 408.4196419 192.93056 214.6738
918 2013 238.1352 NA 127.69952 143.4284
918 2014 288.7280 524.7055873 235.35097 309.8905

Now I am curious what the overlap of the elk are? The data collected is between 2006 and 2015, but we do not have data from all of the elk ids during that time period. It turns out that wild elk can only live up to 10-12 years, according to worlddeer.ord. So let’s find out what the time range is for each of these elk.

One thing I want to check is if the number of data points for each elk is skewing the data. Let’s see how the number of data points spread across the time frame.

elk |>
  group_by(elk_id) |>
  summarize(
    total_distance_km = sum(dist_km, na.rm = TRUE) 
  ) |>
  knitr::kable(digits = 0)
elk_id total_distance_km
572 2224
595 1331
654 2702
656 557
671 2225
706 3751
900 2735
902 3048
903 2652
907 3073
909 3482
911 2109
913 2711
914 1742
916 2427
917 2760
918 1868
# And maybe let's check how many data points we have

elk |>
  ggplot(aes(x = elk_id, fill = factor(year))) +
  geom_bar() +
  labs(title = "Total Data Points for Each Elk",
       x = "Elk ID",
       y = "# of data points",
       fill = "Year") 

We can see that the data is not consistent for for all of the elk across the 2006-2015 time period. However, there is a lot of overlap for 2 groups of elk. In further analysis we will use the 8 elk that overlap in 2013 and 2014.

elk |>
  group_by(elk_id) |>
  summarize(start_time = min(dt),
            end_time = max(dt)) |>
  knitr::kable()
elk_id start_time end_time
572 2006-03-01 18:01:00 2008-03-01 06:00:00
595 2007-01-16 18:01:00 2008-03-08 14:00:00
654 2010-03-16 15:00:00 2011-11-17 07:00:00
656 2010-03-16 15:00:00 2010-10-25 17:02:00
671 2012-03-22 16:00:00 2014-02-26 06:01:00
706 2012-01-24 10:30:00 2014-03-15 07:30:00
900 2008-07-13 00:01:00 2010-03-31 16:00:00
902 2008-07-09 06:00:00 2010-04-01 10:00:00
903 2008-07-09 04:01:00 2010-04-01 12:00:00
907 2013-07-16 10:01:00 2015-06-18 06:00:00
909 2013-07-16 10:01:00 2015-07-30 10:00:00
911 2013-07-16 18:00:00 2014-12-22 10:00:00
913 2013-07-16 14:01:00 2015-08-25 06:00:00
914 2013-07-16 14:00:00 2014-12-22 10:00:00
916 2013-07-16 14:01:00 2014-12-22 10:00:00
917 2013-07-16 18:01:00 2015-08-13 10:01:00
918 2013-07-16 18:00:00 2014-12-18 12:00:00
elk_df_2013.2014 = 
  elk |>
  filter(dt >= as_date("2013-07-16") &
         dt <= as_date("2014-12-30"),
         elk_id %in% c(907, 909, 911, 913, 914, 916, 917, 918))

The most overlapping data occurs between July 16th 2013 and December 30th, 2014. We have 8 elk that have data for this time range: 907, 909, 911, 913, 914, 916, 917, 918. Now let’s see how they move around!

Plot the seasonal data

elk_monthly = elk_df_2013.2014 |>
  mutate(
    month = month(dt, label = TRUE),  
  ) |>
  arrange(elk_id, dt)  

# Calculate distances and group by year, season, and elk_id
elk_distance_monthly = 
  elk_monthly |>
  group_by(elk_id, year, month) |>
  summarize(
    total_distance_km = sum(dist_km, na.rm = TRUE)
  ) |>
  ungroup() 
`summarise()` has grouped output by 'elk_id', 'year'. You can override using
the `.groups` argument.
elk_distance_monthly |>
  ggplot(aes(x = as.numeric(month), y = total_distance_km, color = elk_id)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  scale_x_continuous(
    breaks = 1:12,  # Numeric positions for each month
    labels = month.name  # Use month names as labels
  ) +
  labs(title = "Elk Movement by Month 2013-07-16 to 2014-12-30",
       x = "month",
       y = "total distance (km)")
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

It looks like May and December are when the elk are moving around the most.

I want to check if the number of observations for each month has an impact on these trends. I’m going to see if normalizing the data based on the number of observations has a significant impact on the trends.

elk_distance_normalized = 
  elk_monthly |>
  group_by(elk_id, year, month) |>
  summarize(
    total_distance_km = sum(dist_km, na.rm = TRUE),
    count = n()
  ) |>
  mutate(normal_distance = total_distance_km/count)
`summarise()` has grouped output by 'elk_id', 'year'. You can override using
the `.groups` argument.
elk_distance_normalized |>
  ggplot(aes(x = as.numeric(month), y = normal_distance, color = elk_id)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  scale_x_continuous(
    breaks = 1:12,  # Numeric positions for each month
    labels = month.name  # Use month names as labels
  ) +
  labs(title = "Elk Movement by Month 2013-07-16 to 2014-12-30",
       x = "month",
       y = "normalized distance")
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

The normalized data has a similar trend so we will continue with the raw data in km.

Let’s see where the elk are moving around in the month of May. Below is the elk mapped in Yellowstone park with their start and end points.

# Let's make a custom elk icon!!
elk_icon <- makeIcon(
  iconUrl = "pics/elk_icon.png", # Replace with the URL of your moose image
  iconWidth = 30, iconHeight = 30
)

filtered_data = elk_df_2013.2014 |>
  filter(month == 5)

# Create a color palette (limited to 9 elk IDs for "Set1")
elk_ids = unique(filtered_data$elk_id)  # Get unique elk IDs
num_colors = length(elk_ids)    # Ensure we don't exceed palette limit
path_colors = colorFactor(palette = RColorBrewer::brewer.pal(num_colors, "Set1"), domain = elk_ids)

# Initialize leaflet map
map <- filtered_data |>
  group_by(elk_id) |>
  summarize(start_long = first(long), start_lat = first(lat),
            end_long = last(long), end_lat = last(lat))|>
  ungroup() |>
  leaflet() |>
  addProviderTiles(providers$CartoDB.Positron, group = "Base Map") |>
  addProviderTiles(providers$Esri.NatGeoWorldMap, group = "NatGeo Map") |>
  addMarkers(lng = ~start_long, lat = ~start_lat, icon = elk_icon, popup = ~paste("Start Point: Elk", elk_id)) |>
  addMarkers(lng = ~end_long, lat = ~end_lat, icon = elk_icon, popup = ~paste("End Point: Elk ", elk_id))

# Add lines for each elk's path
for (id in elk_ids) {
  elk_data <- filtered_data |> filter(elk_id == id)  # Subset data for each elk
  map <- map |>
    addPolylines(
      data = elk_data,
      lng = ~long, lat = ~lat,
      color = path_colors(id),  # Assign unique color for each elk
      weight = 2,
      opacity = 0.8,
      label = ~paste("Elk ID:", id)  # Label showing elk ID
    )
}

# Add a legend for the elk IDs
map <- map |>
  addLegend(
    position = "topright",
    pal = path_colors,
    values = elk_ids,
    title = "Elk ID"
  )

# Print the map
map

From the leaflet we can tell that the elk all started somewhat close together and migrated northward. They all followed a similar path up to Jackson Lake and then some started to choose different paths. Elk 916 decided they liked it and stayed close to the lake for the rest of the month. Elk 911, 914, and 917 seemed to stay together from start until end. Maybe they are in the same heard. Cute!

Now let’s look at the paths for December because that was also a big movement month. There appears to be one elk, elk id 911 that traveled between Yellowstone National Park and the National elk refuge, whereas no other elk made the same journey in that month.

filtered_data = elk_df_2013.2014 |>
  filter(month == 12,
         year == 2014)

# Initialize leaflet map
map <- filtered_data |>
  group_by(elk_id) |>
  summarize(start_long = first(long), start_lat = first(lat),
            end_long = last(long), end_lat = last(lat))|>
  ungroup() |>
  leaflet() |>
  addProviderTiles(providers$CartoDB.Positron, group = "Base Map") |>
  addProviderTiles(providers$Esri.NatGeoWorldMap, group = "NatGeo Map") |>
  addMarkers(lng = ~start_long, lat = ~start_lat, icon = elk_icon, popup = ~paste("Start Point: Elk", elk_id)) |>
  addMarkers(lng = ~end_long, lat = ~end_lat, icon = elk_icon, popup = ~paste("End Point: Elk ", elk_id))

# Add lines for each elk's path
for (id in elk_ids) {
  elk_data <- filtered_data |> filter(elk_id == id)  # Subset data for each elk
  map <- map |>
    addPolylines(
      data = elk_data,
      lng = ~long, lat = ~lat,
      color = path_colors(id),  # Assign unique color for each elk
      weight = 2,
      opacity = 0.8,
      label = ~paste("Elk ID:", id)  # Label showing elk ID
    )
}

# Add a legend for the elk IDs
map <- map |>
  addLegend(
    position = "topright",
    pal = path_colors,
    values = elk_ids,
    title = "Elk ID"
  )

# Print the map
map

Land Cover Analysis

We read in the combined data set from all_data.csv. This data set contains the same geographic data as the elk.csv data along with land cover data, temperature, and water quality readings in that same geographic area. The data processing steps to create this file can be found in the data cleaning page. T

all_data = read_csv('clean_data/all_data.csv')
Rows: 104913 Columns: 78
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr   (2): location_id, location_name
dbl  (72): elk_id, year, month, day, hour, lat, long, dist_km, land_cover, t...
lgl   (3): monthly_mean_Turbidity FNU, monthly_min_Turbidity FNU, monthly_ma...
dttm  (1): dt

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

We also read in the land cover data for the entire region. The yellow regions of the map are fully covered, many cases by water or snow. The large yellow regions represent Jackson lake, Yellowstone Lake, and Heart Lake. The green areas represent land covered by an abundance of foliage or smaller bodies of water, such as creeks. The dark blue and purple regions represent land with scant foliage cover, such as large rocks.

small_land_coord = rast('clean_data/land_cover.tif')

plot(small_land_coord)

Below is a histogram of the distribution of land cover in the entire region. The most common land cover value is 137, representing sparse vegetation. The second most common value is 500, representing thick vegetation or grasses. The maximum value of 583 represents land covered by water.

land_coord_df = as.data.frame(small_land_coord)

land_coord_df |> 
  ggplot(aes(x = land_cover)) +
  geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Here is the distribution of land cover where elk were observed. It is clear that elk spend more of their time in land with cover between 300 and 500, but this plot is confounded by the distribution of land cover data in the region. In other words, there is much low-cover land, and elk may be forced to spend more time there than they would otherwise prefer.

all_data |> 
  ggplot(aes(x = land_cover)) +
  geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

In order to get a better understanding of the elk’s habits, we will plot the relative frequency of the time that they spend at each land cover value. We begin by binning each land cover value for the region and the elk to find the densities. We divide the elk density by the regional density of each bin. Plotting, we see that elk prefer to spend their time in the regions of middling land cover.

# Define bin breaks
land_cover_bins <- seq(min(land_coord_df$land_cover), max(land_coord_df$land_cover), length.out = 30)

# Bin the vectors
region_density <- cut(land_coord_df$land_cover, land_cover_bins, right = FALSE, labels = FALSE)
elk_density <- cut(all_data$land_cover, land_cover_bins, right = FALSE, labels = FALSE)

# Calculate sums within each bin
sum1 <- tapply(land_coord_df$land_cover, region_density, sum, na.rm = TRUE) / sum(land_coord_df$land_cover)
sum2 <- tapply(all_data$land_cover, elk_density, sum, na.rm = TRUE) / sum(all_data$land_cover)

# Divide the sums of corresponding bins
relative_land_cover =
  data.frame(
    land_cover_bins = land_cover_bins[-1], 
    density_elk = sum2 / sum1)

relative_land_cover |> 
ggplot( aes(x = land_cover_bins, y = density_elk)) +
  geom_bar(stat = "identity") +
  labs(title = "Relative Density of Elk by Land Cover", x = "Land Cover", y = "Elk Density") 

library(tidyterra)
Warning: package 'tidyterra' was built under R version 4.4.2

Attaching package: 'tidyterra'
The following object is masked from 'package:stats':

    filter
alice =
  all_data |> 
  select(elk_id, year, month, day, hour, lat, long, dist_km, land_cover) |> 
  filter(elk_id == 572)

scale_color_gradient

ggplot() +
  geom_spatraster(data = small_land_coord) +
  scale_fill_gradient2(low="white", high="darkgrey", guide="colorbar")
<SpatRaster> resampled to 501095 cells.

In this plot, we see that the elk spend most of their time in the low lying, well vegetated areas of Yellowstone.

ggplot() +
  geom_spatraster(data = small_land_coord) + 
  scale_fill_gradient2(low="white", high="darkgrey", guide="colorbar") + 
  geom_point(data = elk, aes(x=long, y=lat, color = 'red'), alpha = 0.3)
<SpatRaster> resampled to 501095 cells.

Zooming in on the crossing, we see that the elk take one of three paths between Yellowstone national park and the Elk reserve. The first, and furthest West goes between Jackson Lake on the West and Pilgrim mountain to the East. The middle path follows Pilgrim Creek. The East path follows Pacific Creek.

ggplot() +
  geom_spatraster(data = small_land_coord) + 
  geom_path(data = elk, aes(x=long, y=lat, color = 'red'), alpha = 0.7) +
  scale_fill_gradient2(low="white", high="darkgrey", guide="colorbar") + 
  ylim(43.75 ,44.25) + 
  xlim(-110.8, -110.2)
<SpatRaster> resampled to 501095 cells.
Warning: Removed 302 rows containing missing values or values outside the scale range
(`geom_path()`).

Analyzing Weather

We read in the weather data and aggregate the elk data by day.

Among the key factors that we considered to be potentially influential to elk migration was local weather patterns, specifically precipitation (including rain and snow) and average temperature. For this, we analyzed weather station data provided by NOAA National Centers for Environmental Information, utilizing daily weather records from 2006 to 2015 (to correspond with our elk migration data).

Selecting Appropriate Weather Stations

Given that there were numerous weather stations in the Yellowstone/Grand Teton area in Wyoming, several of which were contained within the various elk pathways we analyzed, we decided that the best way to effectively approximate the weather patterns across the entirety migration pathways would be to use the data provided by the four stations in the plots below, which span a wide coverage of the migration areas (shown in red).

Once we selected the appropriate weather stations and reduced our weather data set accordingly, we could then begin considering various weather-related research questions. These questions were grouped into two categories: analyzing the weather patterns visible in the study area over the 2006-2015 period, and relating the weather data with the elk migration data to see if there were any visible patterns and/or trends between the two.

Evaluating Weather Patterns and Trends

In the weather dataset, we were mainly concerned with four weather variables: prcp (precipitation), snow (snowfall), snwd (snow depth), and tavg (average temperature). Each of these variables were visualized using the same graph types. For precipitation, snowfall, and snow depth, the daily measurements among the four weather stations were first aggregated by station, year, and month, to show the monthly totals among each station throughout the 2006-2015 period. These data sets were further condensed into average monthly totals of precipitation, snowfall and snow depth among the four stations; this method provided us with an estimated sum of these three variables covering a wide swath of the study area. Average temperature was calculated by aggregating daily average temperature measurements by month and year and taking the mean of these values.

Starting with precipitation patterns, the plots below show relatively consistent patterns in monthly rainfall throughout the given year, in which the summer months often saw less precipitation out of all other seasons, whereas late winter through spring showed generally higher precipitation. While the exact months of these highs and lows vary per year, the general pattern shown each year is a U-shaped distribution from the start of the year to the end.

Warning in plot_ly(mutate(summarize(group_by(summarize(group_by(weather, : The group argument has been deprecated. Use `group_by()` or split instead.
See `help('plotly_data')` for examples
Warning: line.color doesn't (yet) support data arrays
Warning: line.color doesn't (yet) support data arrays
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors

Perhaps not surprisingly, snowfall and snow depth were both highest in the winter and early spring months and non-existent in the summer months through early fall. There were slight differences between these two variables such that the highest snow depth measurements were visible for longer (more months out of the year) than when the highest snowfall measurements were visible. These patterns are apparent when comparing the plots side-by-side below. These snow patterns are expected for this area and help validate the reasonability of our weather data.

Warning in plot_ly(mutate(summarize(group_by(summarize(group_by(weather, : The group argument has been deprecated. Use `group_by()` or split instead.
See `help('plotly_data')` for examples
Warning in plot_ly(mutate(summarize(group_by(summarize(group_by(weather, : The group argument has been deprecated. Use `group_by()` or split instead.
See `help('plotly_data')` for examples

Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning: textfont.color doesn't (yet) support data arrays
Warning: textfont.color doesn't (yet) support data arrays
Warning: textfont.color doesn't (yet) support data arrays
Warning: textfont.color doesn't (yet) support data arrays

Finally, when visualizing the monthly average temperatures (see below), we see a consistent pattern each year in which the monthly temperatures rise to their peaks in the summer months (July, specifically) and gradually decline after this peak, and the lowest temperatures occurred in January and December. As was the case with our snow data, this is a surprising pattern for Wyoming, and helps validate our data by ensuring that no unexpected shifts in seasonal patterns occurred over the years.

Warning in plot_ly(mutate(summarize(group_by(weather, year, month), year_month_tavg = mean(tavg, : The group argument has been deprecated. Use `group_by()` or split instead.
See `help('plotly_data')` for examples
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning: textfont.color doesn't (yet) support data arrays
Warning: textfont.color doesn't (yet) support data arrays

Analyzing Weather Data and Elk Migration Patterns

After examining the weather data by itself, we then visualized these datasets with the elk migration data to see if there were any visible patterns or trends that emerged. More specifically, we wanted to see if changes in each of the four weather measurements affected the total distance traveled by each elk on a given day, and we did so by plotting the total daily distance traveled as the dependent variable against each of the four weather measurements as the dependent variable.

Following the same order as the previous section, we first plotted the daily distance traveled by the elk against the daily precipitation recordings. In the scatterplot below, we see that the daily distance traveled varied more widely when the daily precipitation measurement was lowest, and that this variation decreased as precipitation levels increased. The smooth-mean line (shown in blue below) across all years appears to have a slight positive slope; this is also reflected in the smooth-mean line graph separated by each elk, which shows that several of the elk appear to increase their distance traveled as precipitation increases. Given that many of the other elk do not follow this same trend, and the smooth-mean lines appear to vary from one another, it is not clear to say whether precipitation increases saw an increase in distance traveled by elk.

Warning: Removed 143 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 143 rows containing non-finite outside the scale range
(`stat_smooth()`).

Similar to the precipitation plot, the snowfall scatterplot also shows greater variation of daily distance traveled among elk at lower snowfall measurements than at higher snowfall measurements. The smooth-mean trend (blue line) does not appear to have much of a positive or negative trend, and while there are only three elk available with associated snowfall data, the smooth-mean trends for each elk follow a similar pattern in which they traveled longer distances on days with lower snowfall, followed by a sharp decline in distance traveled on days with snowfall starting at between approximately 0.3 (Elk 706) and 1.2 in (Elk 654), followed by a gradual increase in distance traveled on days with snowfall starter at or greater than approximately 1.2 (Elk 706) and 3.5 in (Elk 654).

Warning: Removed 1296 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 1296 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.0575
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.0033063
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.0575
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.0575
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 1.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.0575
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.0033063
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.0575
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.0575
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 2.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.05
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.55
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 2.0045e-30
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.25
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.05
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.0025
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.05
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.05
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 4.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.004225
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 7.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.004225
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 8.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.004225
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 9.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.004225
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 10.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.004225
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 11.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.0036
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 12.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.004225
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 13.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.0036
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 14.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.0036
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 15.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.004225
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.065
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 16.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: radius 0.0036
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: all data on boundary of neighborhood. make span bigger
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.06
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: zero-width neighborhood. make span bigger
Warning: Failed to fit group 17.
Caused by error in `predLoess()`:
! NA/NaN/Inf in foreign function call (arg 5)

For snow depth, the overall mean-smooth trend does not show a clear positive or negative trend of distance traveled in response to changes in snow depth. However, when examining the mean-smooth trends of each elk , we see similar patterns among all elk, in which there is a rapid increase in distance traveled on days with lower snow depth (between 0 and ~10-15 in for most elk), followed by a rapid decrease in distance traveled among elk on days with snow depth between ~15 and 35-45 in for most elk, then rapid increases in distance traveled for snow depths beyond this point.

Warning: Removed 185 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 185 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 1.16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1

Lastly, after examining the relationship between daily distance traveled and average daily temperature, we can see a slight downward trend in daily distance traveled among elk as average daily temperature increases, as shown by the main blue mean-smooth line that shows a slightly negative overall slope. This negative relationship is visible in the other plot in which daily distance traveled in response to average temperature is shown for each elk. In this plot, we can see that in general, most elk traveled less on a given day as the average daily temperature increased, and some elk showed a slight uptick in daily distance traveled in the middle of the two temperature extremes (between approximately 25 and 40 degrees F for most elk), before a sharp decline in distance traveled on days with temperatures greater than this range.

`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Analyzing Water Quality

WRITE ABOUT THE SOURCE

# long format
water_quality2 = read_csv('clean_data/water_quality2.csv')
Rows: 3468 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): location_id, location_name, characteristic_name
dbl (5): year, month, monthly_mean, monthly_min, monthly_max

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
water_exploration = 
  water_quality2 |> 
  group_by(
    location_id,
    characteristic_name
  ) |> 
  summarize(
    all_time_mean = mean(monthly_mean, na.rm = TRUE),
    all_time_var = var(monthly_mean, na.rm = TRUE)) |> 
  pivot_wider(
    names_from = location_id,
    values_from = c(all_time_mean, all_time_var)
  ) |> 
  mutate(
    standardized_difference =  abs(all_time_mean_GRTE_SNR01 - all_time_mean_GRTE_SNR02) / sqrt(all_time_var_GRTE_SNR01 + all_time_var_GRTE_SNR02)
  )
`summarise()` has grouped output by 'location_id'. You can override using the
`.groups` argument.

The site GRTE_SNR01 has higher Chloride, Sodium, Sulfur, Arsenic, and Potassium than GRTE_SNR02.

To do: statistical test

water_exploration |> 
  arrange(desc(standardized_difference)) |> 
  dplyr::select(
    characteristic_name,
    all_time_mean_GRTE_SNR01,
    all_time_mean_GRTE_SNR02,
    standardized_difference
  ) |> 
  head(5)
# A tibble: 5 × 4
  characteristic_name              all_time_mean_GRTE_S…¹ all_time_mean_GRTE_S…²
  <chr>                                             <dbl>                  <dbl>
1 Chloride mg/l                                   12.2                   3.46   
2 Sodium mg/l                                     22.1                   7.39   
3 Sulfur, sulfate (SO4) as SO4 mg…                22.5                   8.37   
4 Arsenic mg/l                                     0.0300                0.00412
5 Potassium mg/l                                   3.26                  1.63   
# ℹ abbreviated names: ¹​all_time_mean_GRTE_SNR01, ²​all_time_mean_GRTE_SNR02
# ℹ 1 more variable: standardized_difference <dbl>

Examinging the data more closely, we see that GRTE_SNR02 has a consistently low level of these chemicals, while GRTE_SNR01 occasionally achieves higher levels of these chemicals.

water_quality2 |> 
  filter(characteristic_name %in% c(
    'Chloride mg/l', 'Sodium mg/l',
    'Sulfur, sulfate (SO4) as SO4 mg/l',
    'Potassium mg/l')) |> 
  ggplot(aes(fill = location_id, x = monthly_mean)) + 
  facet_wrap(.~ characteristic_name, scales = 'free_y')+
  geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Examining water quality by year. There was not any significant increase or decrease of these minerals at either location.

water_quality2 |> 
  filter(characteristic_name %in% c(
    'Chloride mg/l', 'Sodium mg/l',
    'Sulfur, sulfate (SO4) as SO4 mg/l',
    'Potassium mg/l')) |> 
  ggplot(
    aes(x = factor(year), y = monthly_mean, fill = location_id)
  ) + 
  facet_wrap(. ~ characteristic_name, scales = 'free_y') +
  geom_boxplot() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Examining water quality by month. I can’t really interpret this graph due ot lack of data.

water_quality2 |> 
  filter(characteristic_name %in% c(
    'Chloride mg/l', 'Sodium mg/l',
    'Sulfur, sulfate (SO4) as SO4 mg/l',
    'Potassium mg/l')) |> 
  ggplot(
    aes(x = factor(month), y = monthly_mean, fill = location_id)
  ) + 
  facet_wrap(. ~ characteristic_name, scales = 'free_y') +
  geom_boxplot() + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

water_quality2 |> 
  group_by(
    location_id, 
    year,
    characteristic_name
  ) |> 
  summarize(vary = var(monthly_max, na.rm = TRUE)) |> 
  drop_na(vary) |> 
  arrange(desc(vary))
`summarise()` has grouped output by 'location_id', 'year'. You can override
using the `.groups` argument.
# A tibble: 605 × 4
# Groups:   location_id, year [35]
   location_id  year characteristic_name      vary
   <chr>       <dbl> <chr>                   <dbl>
 1 GRTE_SNR02   2010 Flow cfs            30812909.
 2 GRTE_SNR02   2017 Flow cfs            12534527.
 3 GRTE_SNR02   2018 Flow cfs            12479095.
 4 GRTE_SNR02   2011 Flow cfs            11657825 
 5 GRTE_SNR02   2009 Flow cfs             9671758.
 6 GRTE_SNR02   2014 Flow cfs             7600899.
 7 GRTE_SNR02   2019 Flow cfs             6499686.
 8 GRTE_SNR02   2016 Flow cfs             5532524.
 9 GRTE_SNR02   2021 Flow cfs             4550665.
10 GRTE_SNR02   2008 Flow cfs             4418100 
# ℹ 595 more rows

Water Temperature

Does water temperature effect migration?

unique(water_quality2$characteristic_name)
 [1] "Arsenic mg/l"                        
 [2] "Calcium mg/l"                        
 [3] "Chloride mg/l"                       
 [4] "Dissolved oxygen (DO) mg/l"          
 [5] "Flow cfs"                            
 [6] "Flow, severity (choice list)"        
 [7] "Magnesium mg/l"                      
 [8] "Nitrogen, ammonia as N mg/l"         
 [9] "Phosphorus as P mg/l"                
[10] "Phosphorus, orthophosphate as P mg/l"
[11] "Potassium mg/l"                      
[12] "Sodium mg/l"                         
[13] "Solids, Suspended (TSS) mg/l"        
[14] "Specific conductance uS/cm"          
[15] "Sulfur, sulfate (SO4) as SO4 mg/l"   
[16] "Temperature, air deg C"              
[17] "Temperature, water deg C"            
[18] "pH"                                  
[19] "Turbidity NTU"                       
[20] "Turbidity FNU"